{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "w_r-Acp_4NTz",
    "outputId": "d736e906-2ddb-4401-a3bd-31ad7dc218a9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: llmware in /usr/local/lib/python3.10/dist-packages (0.3.0)\n",
      "Requirement already satisfied: boto3>=1.24.53 in /usr/local/lib/python3.10/dist-packages (from llmware) (1.34.120)\n",
      "Requirement already satisfied: huggingface-hub>=0.19.4 in /usr/local/lib/python3.10/dist-packages (from llmware) (0.23.2)\n",
      "Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.10/dist-packages (from llmware) (1.25.2)\n",
      "Requirement already satisfied: pymongo>=4.7.0 in /usr/local/lib/python3.10/dist-packages (from llmware) (4.7.3)\n",
      "Requirement already satisfied: tokenizers>=0.15.0 in /usr/local/lib/python3.10/dist-packages (from llmware) (0.19.1)\n",
      "Requirement already satisfied: psycopg-binary==3.1.17 in /usr/local/lib/python3.10/dist-packages (from llmware) (3.1.17)\n",
      "Requirement already satisfied: psycopg==3.1.17 in /usr/local/lib/python3.10/dist-packages (from llmware) (3.1.17)\n",
      "Requirement already satisfied: pgvector==0.2.4 in /usr/local/lib/python3.10/dist-packages (from llmware) (0.2.4)\n",
      "Requirement already satisfied: colorama==0.4.6 in /usr/local/lib/python3.10/dist-packages (from llmware) (0.4.6)\n",
      "Requirement already satisfied: librosa>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from llmware) (0.10.2.post1)\n",
      "Requirement already satisfied: typing-extensions>=4.1 in /usr/local/lib/python3.10/dist-packages (from psycopg==3.1.17->llmware) (4.12.1)\n",
      "Requirement already satisfied: botocore<1.35.0,>=1.34.120 in /usr/local/lib/python3.10/dist-packages (from boto3>=1.24.53->llmware) (1.34.120)\n",
      "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from boto3>=1.24.53->llmware) (1.0.1)\n",
      "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from boto3>=1.24.53->llmware) (0.10.1)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (3.14.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (2023.6.0)\n",
      "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (24.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (6.0.1)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (2.31.0)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.19.4->llmware) (4.66.4)\n",
      "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (3.0.1)\n",
      "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (1.11.4)\n",
      "Requirement already satisfied: scikit-learn>=0.20.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (1.2.2)\n",
      "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (1.4.2)\n",
      "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (4.4.2)\n",
      "Requirement already satisfied: numba>=0.51.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (0.58.1)\n",
      "Requirement already satisfied: soundfile>=0.12.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (0.12.1)\n",
      "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (1.8.1)\n",
      "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (0.3.7)\n",
      "Requirement already satisfied: lazy-loader>=0.1 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (0.4)\n",
      "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.10/dist-packages (from librosa>=0.10.0->llmware) (1.0.8)\n",
      "Requirement already satisfied: dnspython<3.0.0,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from pymongo>=4.7.0->llmware) (2.6.1)\n",
      "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.120->boto3>=1.24.53->llmware) (2.8.2)\n",
      "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /usr/local/lib/python3.10/dist-packages (from botocore<1.35.0,>=1.34.120->boto3>=1.24.53->llmware) (2.0.7)\n",
      "Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.51.0->librosa>=0.10.0->llmware) (0.41.1)\n",
      "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.10/dist-packages (from pooch>=1.1->librosa>=0.10.0->llmware) (4.2.2)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.4->llmware) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.4->llmware) (3.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.19.4->llmware) (2024.6.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.20.0->librosa>=0.10.0->llmware) (3.5.0)\n",
      "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.10/dist-packages (from soundfile>=0.12.1->librosa>=0.10.0->llmware) (1.16.0)\n",
      "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0->soundfile>=0.12.1->librosa>=0.10.0->llmware) (2.22)\n",
      "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.35.0,>=1.34.120->boto3>=1.24.53->llmware) (1.16.0)\n",
      "Collecting lancedb\n",
      "  Downloading lancedb-0.8.2-cp38-abi3-manylinux_2_28_x86_64.whl (19.1 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.1/19.1 MB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting deprecation (from lancedb)\n",
      "  Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n",
      "Collecting pylance==0.12.1 (from lancedb)\n",
      "  Downloading pylance-0.12.1-cp39-abi3-manylinux_2_28_x86_64.whl (23.1 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.1/23.1 MB\u001b[0m \u001b[31m26.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting ratelimiter~=1.0 (from lancedb)\n",
      "  Downloading ratelimiter-1.2.0.post0-py3-none-any.whl (6.6 kB)\n",
      "Requirement already satisfied: requests>=2.31.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.31.0)\n",
      "Collecting retry>=0.9.2 (from lancedb)\n",
      "  Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
      "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.66.4)\n",
      "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.7.3)\n",
      "Requirement already satisfied: attrs>=21.3.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (23.2.0)\n",
      "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from lancedb) (24.0)\n",
      "Requirement already satisfied: cachetools in /usr/local/lib/python3.10/dist-packages (from lancedb) (5.3.3)\n",
      "Collecting overrides>=0.7 (from lancedb)\n",
      "  Downloading overrides-7.7.0-py3-none-any.whl (17 kB)\n",
      "Requirement already satisfied: pyarrow<15.0.1,>=12 in /usr/local/lib/python3.10/dist-packages (from pylance==0.12.1->lancedb) (14.0.2)\n",
      "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.12.1->lancedb) (1.25.2)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.18.4)\n",
      "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.12.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (3.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.31.0->lancedb) (2024.6.2)\n",
      "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.10/dist-packages (from retry>=0.9.2->lancedb) (4.4.2)\n",
      "Collecting py<2.0.0,>=1.4.26 (from retry>=0.9.2->lancedb)\n",
      "  Downloading py-1.11.0-py2.py3-none-any.whl (98 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: ratelimiter, py, overrides, deprecation, retry, pylance, lancedb\n",
      "Successfully installed deprecation-2.1.0 lancedb-0.8.2 overrides-7.7.0 py-1.11.0 pylance-0.12.1 ratelimiter-1.2.0.post0 retry-0.9.2\n",
      "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.41.2)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.23.2)\n",
      "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.5.15)\n",
      "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n",
      "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
      "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n",
      "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.4)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (2023.6.0)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (4.12.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.6.2)\n",
      "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.3.0+cu121)\n",
      "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.14.0)\n",
      "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.1)\n",
      "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12.1)\n",
      "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n",
      "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
      "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
      "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.0)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.5.40)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
      "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install llmware\n",
    "!pip install lancedb\n",
    "!pip install transformers\n",
    "!pip install torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "9ionfrAq4YJ_"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from llmware.library import Library\n",
    "from llmware.retrieval import Query\n",
    "from llmware.setup import Setup\n",
    "from llmware.status import Status\n",
    "from llmware.models import ModelCatalog\n",
    "from llmware.configs import LLMWareConfig, MilvusConfig\n",
    "\n",
    "from importlib import util\n",
    "\n",
    "\n",
    "if not util.find_spec(\"torch\") or not util.find_spec(\"transformers\"):\n",
    "  print(\"\\nto run this example, with the selected embedding model, please install transformers and torch, e.g., \"\n",
    "        \"\\n`pip install torch`\"\n",
    "        \"\\n`pip install transformers`\")\n",
    "\n",
    "if not (util.find_spec(\"chromadb\") or util.find_spec(\"pymilvus\") or util.find_spec(\"lancedb\") or util.find_spec(\"faiss\")):\n",
    "  print(\"\\nto run this example, you will need to pip install the vector db drivers. see comments above,\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "id": "63G2yNI74dCX"
   },
   "outputs": [],
   "source": [
    "LLMWareConfig.set_active_db(\"sqlite\")\n",
    "LLMWareConfig().set_vector_db(\"lancedb\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "r-ApXhrB6z57",
    "outputId": "6e31467c-2283-4b6d-f724-37c1af942836"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "update: Creating library: example2_library\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nupdate: Creating library: {}\".format(\"example2_library\"))\n",
    "library = Library().create_new_library(\"example2_library\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "SNeDdWMa7jHE",
    "outputId": "27cbb2fa-cc50-42cb-c328-d96dd7c37023"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "embedding record - before embedding  [{'embedding_status': 'no', 'embedding_model': 'none', 'embedding_db': 'none', 'embedded_blocks': 0, 'embedding_dims': 0, 'time_stamp': 'NA'}]\n"
     ]
    }
   ],
   "source": [
    "embedding_record = library.get_embedding_status()\n",
    "print(\"embedding record - before embedding \", embedding_record)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Gosqszd47qRn",
    "outputId": "b0117f31-c657-4fa4-f580-f8a9d033c4a2"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "update: Downloading Sample Files\n"
     ]
    }
   ],
   "source": [
    "print(\"update: Downloading Sample Files\")\n",
    "sample_files_path = Setup().load_sample_files(over_write=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "ESrJe7bq7y1x",
    "outputId": "129c154a-d17f-4e60-c610-c3e5bb5bdb8f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "update: Parsing and Text Indexing Files\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'docs_added': 0,\n",
       " 'blocks_added': 0,\n",
       " 'images_added': 0,\n",
       " 'pages_added': 0,\n",
       " 'tables_added': 0,\n",
       " 'rejected_files': []}"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(\"update: Parsing and Text Indexing Files\")\n",
    "library.add_files(input_folder_path=os.path.join(sample_files_path, \"Agreements\"), chunk_size=400, max_chunk_size=600, smart_chunking=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "id": "hBMo7HHv8TCy"
   },
   "outputs": [],
   "source": [
    "embedding_models = ModelCatalog().list_embedding_models()\n",
    "embedding_model = \"mini-lm-sbert\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "ZDgZI1I68nBq",
    "outputId": "88ccf03c-a4f4-46fc-992d-11747e1e22f0"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "update: Starting the Embedding: library - example2_library - vector_db - lancedb - model - mini-lm-sbert\n"
     ]
    }
   ],
   "source": [
    "library_name = library.library_name\n",
    "vector_db = LLMWareConfig().get_vector_db()\n",
    "print(f\"\\nupdate: Starting the Embedding: \"\n",
    "      f\"library - {library_name} - \"\n",
    "      f\"vector_db - {vector_db} - \"\n",
    "      f\"model - {embedding_model}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "ySGrOKU4-6ig",
    "outputId": "cb01d8fd-1f61-44e7-ddf3-77b166d739e4"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 100 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 200 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 300 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 400 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 500 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 600 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 700 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 800 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 900 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1000 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1100 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1200 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1300 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1400 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1500 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1600 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1700 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1800 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 1900 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 2000 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 2100 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 2200 of 2211\n",
      "INFO:llmware.embeddings:update: embedding_handler - Lancedb - Embeddings Created: 2211 of 2211\n",
      "INFO:llmware.embeddings:update: EmbeddingHandler - Lancedb - embedding_summary - {'embeddings_created': 2211, 'embedded_blocks': 2211, 'embedding_dims': 384, 'time_stamp': 'Thu Jun  6 16:09:59 2024'}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'embeddings_created': 2211,\n",
       " 'embedded_blocks': 2211,\n",
       " 'embedding_dims': 384,\n",
       " 'time_stamp': 'Thu Jun  6 16:09:59 2024'}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "library.install_new_embedding(embedding_model_name=embedding_model, vector_db=vector_db,batch_size=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "lmppxssq--H3",
    "outputId": "f825b091-ea64-48fe-8c64-3ed61a482c6b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "update: Embeddings Complete - Status() check at end of embedding -  [{'_id': 2, 'key': 'example2_library_embedding_mini-lm-sbert', 'summary': '2211 of 2211 blocks', 'start_time': '1717690179.087806', 'end_time': '1717690199.5373614', 'total': 2211, 'current': 2211, 'units': 'blocks'}]\n"
     ]
    }
   ],
   "source": [
    "update = Status().get_embedding_status(library_name, embedding_model)\n",
    "print(\"update: Embeddings Complete - Status() check at end of embedding - \", update)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "neWoHMwc_Mtm",
    "outputId": "a7655116-4627-4c67-b173-de1b9a69bb4d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "update: Run a sample semantic/vectory query: incentive compensation\n"
     ]
    }
   ],
   "source": [
    "sample_query = \"incentive compensation\"\n",
    "print(\"\\n\\nupdate: Run a sample semantic/vectory query: {}\".format(sample_query))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "tPJEgjUb_WiI",
    "outputId": "1f46d14c-596b-45ae-dae5-858429050589"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "update: query results - 0 - document - Artemis Poseidon EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.24837934970855713 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 1 - document - Athena EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.24837934970855713 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 2 - document - Amphitrite EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.2483793944120407 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 3 - document - Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.24960115551948547 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 4 - document - Aphrodite EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.24960121512413025 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 5 - document - Bia EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.2503372132778168 \n",
      "update: text sample -  actual   incentive bonus (“Incentive Bonus”) for any fiscal year as determined by the Board (or the compensation   committee  ... \n",
      "\n",
      "update: query results - 6 - document - Persephone EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.269817054271698 \n",
      "update: text sample -  to participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in ex ... \n",
      "\n",
      "update: query results - 7 - document - Rhea EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.26981714367866516 \n",
      "update: text sample -  to participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in ex ... \n",
      "\n",
      "update: query results - 8 - document - Metis EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.26981714367866516 \n",
      "update: text sample -  to participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in ex ... \n",
      "\n",
      "update: query results - 9 - document - Gaia EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.27305811643600464 \n",
      "update: text sample -  in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in existence for oth ... \n",
      "\n",
      "update: query results - 10 - document - Eileithyia EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.27305811643600464 \n",
      "update: text sample -  in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in existence for oth ... \n",
      "\n",
      "update: query results - 11 - document - Demeter EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.2812928557395935 \n",
      "update: text sample -  participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in exist ... \n",
      "\n",
      "update: query results - 12 - document - Nike EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.28129294514656067 \n",
      "update: text sample -  participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in exist ... \n",
      "\n",
      "update: query results - 13 - document - Leto EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.28129294514656067 \n",
      "update: text sample -  participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in exist ... \n",
      "\n",
      "update: query results - 14 - document - Nyx EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 3 distance - 0.28129294514656067 \n",
      "update: text sample -  participate in Employer's annual cash incentive   bonus plan (the “Plan”), based on the same terms and conditions as in exist ... \n",
      "\n",
      "update: query results - 15 - document - Gaia EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.3150967061519623 \n",
      "update: text sample -  target annual bonus under the Plan shall be 80% of Base Salary (“Target Bonus”), but Executive's actual   incentive bonus (“I ... \n",
      "\n",
      "update: query results - 16 - document - Eileithyia EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.3150967061519623 \n",
      "update: text sample -  target annual bonus under the Plan shall be 80% of Base Salary (“Target Bonus”), but Executive's actual   incentive bonus (“I ... \n",
      "\n",
      "update: query results - 17 - document - Metis EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.3150967061519623 \n",
      "update: text sample -  target annual bonus under the Plan shall be 80% of Base Salary (“Target Bonus”), but Executive's actual   incentive bonus (“I ... \n",
      "\n",
      "update: query results - 18 - document - Rhea EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.3150967061519623 \n",
      "update: text sample -  target annual bonus under the Plan shall be 80% of Base Salary (“Target Bonus”), but Executive's actual   incentive bonus (“I ... \n",
      "\n",
      "update: query results - 19 - document - Leto EXECUTIVE EMPLOYMENT AGREEMENT.pdf - page num - 4 distance - 0.31509676575660706 \n",
      "update: text sample -  target annual bonus under the Plan shall be 80% of Base Salary (“Target Bonus”), but Executive's actual   incentive bonus (“I ... \n"
     ]
    }
   ],
   "source": [
    "query_results = Query(library).semantic_query(sample_query, result_count=20)\n",
    "for i, entries in enumerate(query_results):\n",
    "  text = entries[\"text\"]\n",
    "  document_source = entries[\"file_source\"]\n",
    "  page_num = entries[\"page_num\"]\n",
    "  vector_distance = entries[\"distance\"]\n",
    "\n",
    "  if len(text) > 125: text = text[0:125] + \" ... \"\n",
    "\n",
    "  print(\"\\nupdate: query results - {} - document - {} - page num - {} distance - {} \".format(i, document_source, page_num, vector_distance))\n",
    "\n",
    "  print(\"update: text sample - \", text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "EpmAWSF0_8uP",
    "outputId": "f80b9a23-e8c5-4020-eefe-a81579554967"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "update: embedding record -  [{'embedding_status': 'yes', 'embedding_model': 'mini-lm-sbert', 'embedding_db': 'lancedb', 'embedding_dims': 384, 'embedded_blocks': 2211, 'time_stamp': 'Thu Jun  6 16:09:59 2024'}]\n"
     ]
    }
   ],
   "source": [
    "embedding_record = library.get_embedding_status()\n",
    "print(\"\\nupdate: embedding record - \", embedding_record)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
